set.seed(1) 
library(splines)
library(ggplot2)
library(ggtree)
#> ggtree v3.2.1  For help: https://yulab-smu.top/treedata-book/
#> 
#> If you use ggtree in published research, please cite the most appropriate paper(s):
#> 
#> 1. Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
#> 2. Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution. 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
#> 3. Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
library(reshape2)
library(CostOfResistance)
#> Warning: replacing previous import 'ape::drop.tip' by 'treeio::drop.tip' when
#> loading 'CostOfResistance'
library(ape)
#> 
#> Attaching package: 'ape'
#> The following object is masked from 'package:ggtree':
#> 
#>     rotate
library(posterior)
#> This is posterior version 1.2.1
#> 
#> Attaching package: 'posterior'
#> The following objects are masked from 'package:stats':
#> 
#>     mad, sd, var
library(cmdstanr)
#> This is cmdstanr version 0.5.2
#> - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
#> - CmdStan path: /home/david/.cmdstan/cmdstan-2.30.1
#> - CmdStan version: 2.30.1
#> 
#> A newer version of CmdStan is available. See ?install_cmdstan() to install it.
#> To disable this check set option or environment variable CMDSTANR_NO_VER_CHECK=TRUE.
library(patchwork)
library(viridis)
#> Loading required package: viridisLite
library(RColorBrewer)
library(latex2exp)

run_mcmc <- F

Load tree from Yonathan Grad’s paper, do some boring bioinformatics stuff

Honestly wish my computer could just read my mind and do what I want, automatically

tr <- read.tree(system.file("extdata", "grad2016.nwk", package="CostOfResistance", mustWork=T))
tr <- makeNodeLabel(tr)
meta <- read.table(system.file("extdata", "grad2016.tab", package="CostOfResistance", mustWork=T),
            sep='\t',comment.char='',header=T,as.is=T)
rownames(meta) <- meta$ID
cutoffs <- read.csv(system.file("extdata", "cutoffs.csv", package="CostOfResistance", mustWork=T))

Dichotomise Resistance data

rdf <- data.frame(ID=meta$ID)
rownames(rdf) <- rownames(meta)

abx_names <- data.frame(cutoff_name = c("CFX", "CIP", "CRO", "AZI", "PEN", "TET", "SPC"),
    meta_name=c("CFX", "CIP", "CRO", "AZI", "PEN", "TET", "SPC")
)

for (s in c(1:nrow(abx_names))) {
    rdf[[abx_names$cutoff_name[s]]] <- (meta[[abx_names$meta_name[s]]] > cutoffs[[abx_names$cutoff_name[s]]][2])
}

rdf<-within(rdf, rm(ID))

Select the three lineages of interest

CL_1 <- extract.clade(tr, "Node129")$tip.label
CL_2 <- extract.clade(tr, "Node580")$tip.label
CL_3 <- extract.clade(tr, "Node1038")$tip.label

tr_r <- keep.tip(tr, c(CL_1,CL_2,CL_3))

#Remove CFX resistant subclade
tr_r <- drop.tip(tr_r, extract.clade(tr_r, "Node732")$tip.label)

tr_r <- ladderize(tr_r)

p <- ggtree(tr_r) +  geom_nodelab(geom='text',size=4) +
    theme_tree2()
gheatmap(p, rdf, offset=8, width=0.6, 
        colnames=FALSE, legend_title="Resistance")+
        scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.

We are interested in the two lineages within the subtree with root at Node125

cl_dat <- data.frame(node=nodeid(tr_r, c("Node580", "Node129", "Node1038")), name=c("FQ Susceptible", "FQ Resistant 1","FQ Resistant 2"))
mrsd_full <- max(meta[tr_r$tip.label, "Year"])

p <- ggtree(tr_r, mrsd=paste0(mrsd_full,"-01-01")) + 
    geom_cladelab(
        data = cl_dat,
        mapping = aes(
            node = node, 
            label = name, 
            color = name
        ),
        fontsize = 3,
        angle=90,
        align = TRUE,
        vjust = 0.0,
        hjust = "center",
        offset= 10,
        lwd=4.0,
        show.legend = FALSE
    ) + 
    scale_color_brewer(palette="Dark2") +
    theme_tree2()
h1 <- gheatmap(p, rdf[,c("CIP", "AZI", "PEN", "TET")], width=0.2, colnames=FALSE, legend_title="Resistance")+
        scale_fill_manual(values=c("blue", "red")) + 
        guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
        scale_x_ggtree() +
        theme(legend.position="bottom",
        axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.

h1

pdf("Figures/gono_tree_90.pdf",8,8)
plot(h1)
dev.off()
#> png 
#>   2

Node580 corresponds to FQ susceptible, Node129 is FQ resistant Extract the two lineages, filter out tips with missing FQ profiles,or that are FQ resistant and in the susceptible clade and vice versa Further remove all tips that are CIP resistant (mostly a small subclade in the susceptible tree)

sus_tr <- extract.clade(tr_r, "Node580")
res_tr_1 <- extract.clade(tr_r, "Node129")
res_tr_2 <- extract.clade(tr_r, "Node1038")

rdf_sus <- rdf[sus_tr$tip.label,]
rdf_res_1 <- rdf[res_tr_1$tip.label,]
rdf_res_2 <- rdf[res_tr_2$tip.label,]

tip_times_sus <- meta[rownames(rdf_sus),]$Year
tip_times_res_1 <- meta[rownames(rdf_res_1),]$Year
tip_times_res_2 <- meta[rownames(rdf_res_2),]$Year

mrsd_sus <- max(tip_times_sus)
mrsd_res_1 <- max(tip_times_res_1)
mrsd_res_2 <- max(tip_times_res_2)

Plot the filtered subtrees

p1 <- ggtree(sus_tr, mrsd=paste0(mrsd_sus,"-01-01")) +
    geom_tiplab(size=1, align=TRUE, linesize=.5)+
    scale_x_ggtree() + 
    theme_tree2()
gheatmap(p1, rdf, offset=8, width=0.6, 
    colnames=FALSE, legend_title="Resistance")+
    scale_fill_manual(values=c("blue", "red"))+
    labs(title="Susceptible Tree") +
    guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
    scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.

p2 <-ggtree(res_tr_1, mrsd=paste0(mrsd_res_1,"-01-01"))+theme_tree2() +
    geom_tiplab(size=1, align=TRUE, linesize=.5)+
    scale_x_ggtree() + 
    theme_tree2()
gheatmap(p2, rdf, offset=8, width=0.6, 
        colnames=FALSE, legend_title="Resistance")+
        scale_fill_manual(values=c("blue", "red"))+
        labs(title="Resistant Tree") +
        guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
        scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.

p2 <-ggtree(res_tr_2, mrsd=paste0(mrsd_res_2,"-01-01"))+theme_tree2() +
    geom_tiplab(size=1, align=TRUE, linesize=.5)+
    scale_x_ggtree() + 
    theme_tree2()
gheatmap(p2, rdf, offset=8, width=0.6, 
        colnames=FALSE, legend_title="Resistance")+
        scale_fill_manual(values=c("blue", "red"))+
        labs(title="Resistant Tree") +
        guides(color = "none", fill=guide_legend("MIC >= Cutoff")) +
        scale_x_ggtree()
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.

Set the analysis end date, move tips into the middle of the year as opposed to the beginning by offsetting most recent sampling date by +0.5

t_end <- max(mrsd_res_1, mrsd_res_2, mrsd_sus)+1 
mr_sus <- t_end-mrsd_sus-0.5
mr_res_1 <- t_end-mrsd_res_1-0.5
mr_res_2 <- t_end-mrsd_res_2-0.5

print(t_end)
#> [1] 2014

Load and preprocess usage data

pl_begin <- 1988
# AZI, PEN, TET, CEF, FQ
dat <- system.file("extdata", "usage-clean.csv", package = "CostOfResistance", mustWork = TRUE)
abx_raw <- as.matrix(read.csv(dat, header=F))
abx_df <- as.data.frame(t(abx_raw))
colnames(abx_df) <- c("year",abx_df[1,2:14])
abx_df <- abx_df[-1,]
rownames(abx_df) <- as.integer(abx_df[,1])
abx_df$year <- as.integer(abx_df$year)
abx_df$FQ <- as.numeric(abx_df[, "Ciprofloxacin"]) + as.numeric(abx_df[, "Ofloxacin"])
abx_df$CPH <- as.numeric(abx_df[, "Cefixime"]) + as.numeric(abx_df[, "Ceftriaxone 250 mg"]) + as.numeric(abx_df[, "Ceftriaxone 125 mg"]) +as.numeric(abx_df[, "Other Cephalo."]) 

abx_df$Other <- 100-as.numeric(abx_df[, "FQ"]) - 
                as.numeric(abx_df[, "CPH"]) -
                as.numeric(abx_df[, "Penicillins"]) -
                as.numeric(abx_df[, "Azithromycin 2gm"]) -
                as.numeric(abx_df[, "Tetracyclines"])

usage_f_fq <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"FQ"]), c(pl_begin:t_end))
usage_f_pen <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Penicillins"]), c(pl_begin:t_end))
usage_f_tet <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Tetracyclines"]), c(pl_begin:t_end))
usage_f_azi <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Azithromycin 2gm"]), c(pl_begin:t_end))
usage_f_CPH <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"CPH"]), c(pl_begin:t_end))
usage_f_other <- yearly_usg_stepfunc(as.numeric(abx_df[sapply(c(pl_begin:t_end), paste0),"Other"]), c(pl_begin:t_end))

time_grid <- seq(from=pl_begin, to=t_end, length.out=4000)
fq_df <- data.frame(year=rep(time_grid,6), 
                    usage=c(sapply(time_grid,usage_f_fq),
                            sapply(time_grid,usage_f_pen),
                            sapply(time_grid,usage_f_tet),
                            sapply(time_grid,usage_f_azi),
                            sapply(time_grid,usage_f_CPH),
                            sapply(time_grid,usage_f_other)),
                    abx=c(rep("Fluoroquinolones",length(time_grid)), 
                          rep("Penicillins", length(time_grid)), 
                          rep("Tetracyclines",length(time_grid)),
                          rep("Azithromycin", length(time_grid)),
                          rep("Cephalosporins", length(time_grid)),
                          rep("Other", length(time_grid))))

Plot usage data, plot individual areas for AMRs that the lineages carry resistance against

p <- ggplot(fq_df, aes(x=year, y=usage, fill=factor(abx, levels=rev(c("Fluoroquinolones", "Penicillins","Tetracyclines", "Cephalosporins", "Azithromycin", "Other"))))) +
    geom_area() +
    scale_x_continuous(breaks = seq(pl_begin, t_end, by =1)) + 
    labs(x="Year", y="Average Usage as Proportion of Primary Treatment") +
    guides(fill = guide_legend(title="Antimicrobial",reverse=F)) +
    scale_fill_viridis(discrete=T) +
    scale_y_continuous(labels = scales::percent_format(scale = 1), breaks=seq(from=0,to=100, by=10)) +
    geom_vline(xintercept=1995, linetype="longdash", color="gray70",alpha=.8)+
    geom_text(aes(x=1995, label="\nAnalysis Start Date", y=40), size=rel(4.0), colour="gray70", alpha=.8, angle=90)+
    theme_minimal() +
    theme(axis.text.x=element_text(size=rel(0.7), angle = 45, vjust=0.5, hjust=0.5),
        axis.text.y=element_text(size=rel(0.7), hjust=1),
        axis.title.y=element_text(size=rel(0.6)),
        axis.title.x=element_text(size=rel(0.6)),
        aspect.ratio=1,
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(size=rel(0.2), colour = "grey80"),
        legend.position="right",
        legend.justification="left",
        legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(0,0,0,-10))

pl1 <- h1 + annotation_custom(
    grob = ggplotGrob(p),
    xmin = 1795,
    xmax = 1995,
    ymin = 190,
    ymax = 410)

pdf("Figures/gono_data.pdf",6,6)
pl1
dev.off()
#> png 
#>   2

We can see that PEN + TET was used in significant quantities before ~1995. The FQ resistant lineage carries resistance against these as well, but susceptible clade does not. Set the analysis start date to 1995. Subset FQ usage data for the given timespan.

t_begin <- 1995
usage_df <- data.frame(year=c(t_begin:t_end), usage=as.numeric(abx_df[sapply(c(t_begin:t_end), paste0),"FQ"]))
usage_df$usage <- usage_df$usage/100

Perform Inference

gamma_ansatz <- 1/90.0
time_scale <- 365.0
t0 <- t_begin #0
tmax <- t_end #t_end - t_begin
usage_df$time #<- usage_df$time - t_begin
#> NULL
stopifnot(all(usage_df$time<=tmax))
stopifnot(all(usage_df$time>=t0))

Sample

out <- infer_costs2(list(sus_tr, res_tr_1, res_tr_2), 
                    list(mr_sus,mr_res_1, mr_res_2),
                    usage_df$usage,
                    usage_df$year,
                    t0,
                    tmax,
                    gamma_ansatz,
                    time_scale,
                    n_iter=2000, 
                    n_warmup=2000,
                    model="deterministic",
                    K=60,
                    L=6.5,
                    seed=1345678,
                    gamma_log_sd = 0.1,
                    stan_control=list(adapt_delta=.99,
                        max_treedepth=13,
                        parallel_chains=4,
                        chains=4,
                        refresh=1000
                    ))
saveRDS(out, "~/gono_90s.rds")

Load existing analysis result

out <- readRDS("~/gono_90s.rds")

Check convergence

print(out$converged)
#> [1] TRUE

Inferred dynamics & Rt

source("Figures/figure3.R")
pdf("Figures/figure3_90.pdf",6,6)
plot(fig3)
dev.off()
#> png 
#>   2
fig3

Pairs Plot

fig4 <- plot_qpairs(out)
pdf("Figures/figure4_90.pdf",6,6)
plot(fig4)
dev.off()
#> png 
#>   2
fig4

RR map

source("Figures/figure5.R")
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8922 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8922 rows containing non-finite values (stat_contour_filled).
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Scale for 'y' is already present. Adding another scale for 'y', which will
#> replace the existing scale.
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8856 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8856 rows containing non-finite values (stat_contour_filled).
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.
#> Scale for 'x' is already present. Adding another scale for 'x', which will
#> replace the existing scale.
pdf("Figures/figure5_90.pdf",6,6)
plot(fig5)
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
dev.off()
#> png 
#>   2
fig5
#> Warning: Removed 8544 rows containing non-finite values (stat_contour_filled).
#> Removed 8544 rows containing non-finite values (stat_contour_filled).

Traces & ppcheck

s4 <- plot_traces(out)
pdf("Figures/s4_90.pdf",12,12)
plot(s4)
dev.off()
#> png 
#>   2
s4

s5a <- plot_ppcheck_At(out, 1)
s5b <- plot_ppcheck_At(out, 2)
s5c <- plot_ppcheck_At(out, 3)
pdf("Figures/s5_90.pdf",6,6)
plot(s5a)
plot(s5b)
plot(s5c)
dev.off()
#> png 
#>   2
s5a

s5b

s5c

Hyppeparameter pairs

s6 <- plot_hyperpar_pairs(out)
pdf("Figures/s6_90.pdf",6,6)
plot(s6)
dev.off()
#> png 
#>   2
s6

s7a <- plot_epi_dynamic(out,1)
s7b <- plot_rt(out,1)
pdf("Figures/s7_90.pdf",6,6)
plot(s7a)
plot(s7b)
dev.off()
#> png 
#>   2
s7a

s7b